<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"><title>AI笔记 | zapqwe block</title><meta name="author" content="laptony"><meta name="copyright" content="laptony"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="对AI的一些语法总结笔记">
<meta property="og:type" content="article">
<meta property="og:title" content="AI笔记">
<meta property="og:url" content="https://sswd123.gitee.io/posts/10059/index.html">
<meta property="og:site_name" content="zapqwe block">
<meta property="og:description" content="对AI的一些语法总结笔记">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/A547B66ABDA63D737A9376686FDA3736.jpg">
<meta property="article:published_time" content="2023-01-09T09:30:17.000Z">
<meta property="article:modified_time" content="2023-01-20T09:36:11.006Z">
<meta property="article:author" content="laptony">
<meta property="article:tag" content="AI">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/A547B66ABDA63D737A9376686FDA3736.jpg"><link rel="shortcut icon" href="/img/favicon.png"><link rel="canonical" href="https://sswd123.gitee.io/posts/10059/"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//fonts.googleapis.com" crossorigin=""/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fancyapps/ui/dist/fancybox.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Titillium+Web&amp;display=swap" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = { 
  root: '/',
  algolia: undefined,
  localSearch: {"path":"/search.xml","languages":{"hits_empty":"找不到您查询的内容：${query}"}},
  translate: undefined,
  noticeOutdate: undefined,
  highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":false},
  copy: {
    success: '复制成功',
    error: '复制错误',
    noSupport: '浏览器不支持'
  },
  relativeDate: {
    homepage: false,
    post: false
  },
  runtime: '天',
  date_suffix: {
    just: '刚刚',
    min: '分钟前',
    hour: '小时前',
    day: '天前',
    month: '个月前'
  },
  copyright: undefined,
  lightbox: 'fancybox',
  Snackbar: undefined,
  source: {
    justifiedGallery: {
      js: 'https://cdn.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.js',
      css: 'https://cdn.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.css'
    }
  },
  isPhotoFigcaption: false,
  islazyload: false,
  isAnchor: false
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
  title: 'AI笔记',
  isPost: true,
  isHome: false,
  isHighlightShrink: false,
  isToc: true,
  postUpdate: '2023-01-20 17:36:11'
}</script><noscript><style type="text/css">
  #nav {
    opacity: 1
  }
  .justified-gallery img {
    opacity: 1
  }

  #recent-posts time,
  #post-meta time {
    display: inline !important
  }
</style></noscript><script>(win=>{
    win.saveToLocal = {
      set: function setWithExpiry(key, value, ttl) {
        if (ttl === 0) return
        const now = new Date()
        const expiryDay = ttl * 86400000
        const item = {
          value: value,
          expiry: now.getTime() + expiryDay,
        }
        localStorage.setItem(key, JSON.stringify(item))
      },

      get: function getWithExpiry(key) {
        const itemStr = localStorage.getItem(key)

        if (!itemStr) {
          return undefined
        }
        const item = JSON.parse(itemStr)
        const now = new Date()

        if (now.getTime() > item.expiry) {
          localStorage.removeItem(key)
          return undefined
        }
        return item.value
      }
    }
  
    win.getScript = url => new Promise((resolve, reject) => {
      const script = document.createElement('script')
      script.src = url
      script.async = true
      script.onerror = reject
      script.onload = script.onreadystatechange = function() {
        const loadState = this.readyState
        if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
        script.onload = script.onreadystatechange = null
        resolve()
      }
      document.head.appendChild(script)
    })
  
      win.activateDarkMode = function () {
        document.documentElement.setAttribute('data-theme', 'dark')
        if (document.querySelector('meta[name="theme-color"]') !== null) {
          document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
        }
      }
      win.activateLightMode = function () {
        document.documentElement.setAttribute('data-theme', 'light')
        if (document.querySelector('meta[name="theme-color"]') !== null) {
          document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
        }
      }
      const t = saveToLocal.get('theme')
    
          if (t === 'dark') activateDarkMode()
          else if (t === 'light') activateLightMode()
        
    const detectApple = () => {
      if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
        document.documentElement.classList.add('apple')
      }
    }
    detectApple()
    })(window)</script><!-- -音乐--><div class="aplayer" data-id="7455077979" data-server="tencent" data-type="playlist" data-fixed="true" data-listFolded="false" data-order="random" data-preload="none"></div><link rel="stylesheet" href="https://cdn.bootcss.com/aplayer/1.10.1/APlayer.min.css"><script src="https://cdn.bootcss.com/aplayer/1.10.1/APlayer.min.js"></script><script src="https://cdn.jsdelivr.net/npm/meting@1.2.0/dist/Meting.min.js"></script><link rel="stylesheet" href="/css/font.css"><!-- hexo injector head_end start --><link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Zfour/Butterfly-double-row-display@1.00/cardlistpost.min.css"/>
<style>#recent-posts > .recent-post-item >.recent-post-info > .article-meta-wrap > .tags:before {content:"\A";
  white-space: pre;}#recent-posts > .recent-post-item >.recent-post-info > .article-meta-wrap > .tags > .article-meta__separator{display:none}</style>
<!-- hexo injector head_end end --><meta name="generator" content="Hexo 6.3.0"></head><body><div id="loading-box"><div class="loading-left-bg"></div><div class="loading-right-bg"></div><div class="spinner-box"><div class="configure-border-1"><div class="configure-core"></div></div><div class="configure-border-2"><div class="configure-core"></div></div><div class="loading-word">加载中...</div></div></div><script>const preloader = {
  endLoading: () => {
    document.body.style.overflow = 'auto';
    document.getElementById('loading-box').classList.add("loaded")
  },
  initLoading: () => {
    document.body.style.overflow = '';
    document.getElementById('loading-box').classList.remove("loaded")

  }
}
window.addEventListener('load',()=> { preloader.endLoading() })

if (true) {
  document.addEventListener('pjax:send', () => { preloader.initLoading() })
  document.addEventListener('pjax:complete', () => { preloader.endLoading() })
}</script><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/11.gif" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">12</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">9</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">3</div></a></div><hr/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-homef"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 归档</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/photos/"><i class="fa-fw fas fa-camera"></i><span> 图库</span></a></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> 友链</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> 关于</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li></ul></div></div></div></div><div class="post" id="body-wrap"><header class="post-bg" id="page-header" style="background-image: url('https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/A547B66ABDA63D737A9376686FDA3736.jpg')"><nav id="nav"><span id="blog_name"><a id="site-name" href="/">zapqwe block</a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-homef"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 归档</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> Tags</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/photos/"><i class="fa-fw fas fa-camera"></i><span> 图库</span></a></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> 友链</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-heart"></i><span> 关于</span></a></div><div class="menus_item"><a class="site-page group" href="javascript:void(0);"><i class="fa-fw fas fa-list"></i><span> List</span><i class="fas fa-chevron-down"></i></a><ul class="menus_item_child"><li><a class="site-page child" href="/music/"><i class="fa-fw fas fa-music"></i><span> Music</span></a></li></ul></div></div><div id="toggle-menu"><a class="site-page"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="post-info"><h1 class="post-title">AI笔记</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2023-01-09T09:30:17.000Z" title="发表于 2023-01-09 17:30:17">2023-01-09</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2023-01-20T09:36:11.006Z" title="更新于 2023-01-20 17:36:11">2023-01-20</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E7%AC%94%E8%AE%B0/">笔记</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-wordcount"><i class="far fa-clock fa-fw post-meta-icon"></i><span class="post-meta-label">阅读时长:</span><span>1分钟</span></span><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="AI笔记"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div></header><main class="layout" id="content-inner"><div id="post"><article class="post-content" id="article-container"><h2 id="神经网络深度学习"><a href="#神经网络深度学习" class="headerlink" title="神经网络深度学习"></a>神经网络深度学习</h2><p>采集数据（脏数据去噪）+定义问题（优化目标）+模型搭建（特征+模型），评估优化，模型应用A&#x2F;B(test)</p>
<h3 id="数据"><a href="#数据" class="headerlink" title="数据"></a>数据</h3><p>3维：RGB（宽x高x通道）<br>4维：一个RGB批量（批量大小x宽x高x通道）<br>5维：一个视频批量（批量大小x时间x宽x高x通道）</p>
<h3 id="卷积神经网络："><a href="#卷积神经网络：" class="headerlink" title="卷积神经网络："></a>卷积神经网络：</h3><p>解决参数过多问题（过拟合，收敛到较差的局部值）<br>先验知识：局部连接（神经元不用与每个像素点连接图像区域性），参数共享（特征与位置无关）<br>公式：输出尺寸&#x3D;（输入尺寸-P）&#x2F;S+1<br>参数：<br>1.size<br>2.步长stride<br>3.padding填充,输出与输入size 一样 ，若卷积核size&#x3D;3 则padding 2<br>4.通道：多个卷积核（提取多个特征）<br>参数数目：输入通道数<em>输出通道数</em>卷积核长<em>卷积核宽<br>输入三通道输出192通道卷积核大小3</em>3 则参数3<em>（3</em>3）*192<br>激活函数：非线性，w矩阵乘法<br>池化操作：最大或平均</p>
<h2 id="强化学习"><a href="#强化学习" class="headerlink" title="强化学习"></a>强化学习</h2><p>b站王树森 <a target="_blank" rel="noopener" href="https://www.bilibili.com/video/BV12o4y197US/?spm_id_from=333.999.0.0">https://www.bilibili.com/video/BV12o4y197US/?spm_id_from=333.999.0.0</a></p>
<h3 id="概率论"><a href="#概率论" class="headerlink" title="概率论"></a>概率论</h3><p>高斯概率分布：<br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670469061363-92728b64-fa29-49c5-a94e-f5f0b5e187e4.jpeg"><br>概率密度积分：<br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670469079732-a868eda1-234c-4e0f-a732-be4a27648a36.jpeg"></p>
<p><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670469144538-dd4d151e-99ac-4fb7-8782-36c11aa749e4.jpeg"></p>
<h3 id="总览"><a href="#总览" class="headerlink" title="总览"></a>总览</h3><p>强化学习框架:<br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670469189813-dd1c72a2-3ace-40e1-ba7c-f6902551454c.jpeg"></p>
<p>强化学习概念：<br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670469240423-f35843d8-371f-4a99-8e02-82886ac9ec38.jpeg"></p>
<p>随机性来源：行为随机性（Π），环境随机性（状态转移）</p>
<p>state-action-reward 三元组</p>
<h3 id="分类"><a href="#分类" class="headerlink" title="分类"></a>分类</h3><p>通过价值选行为：Q-Learning,Sarsa,Deep-Q-network（TD算法，动作为动作价值函数最大的动作）<br>直接选行为：Policy gradients （策略梯度，适用连续空间，行动不确定）<br>actor-critic(policy network + value network)<br>想象环境并从中学习：model based RL<br>model-based-RL:对环境建模<br>off-line learning，on-line learning</p>
<h3 id="马尔可夫知识"><a href="#马尔可夫知识" class="headerlink" title="马尔可夫知识"></a>马尔可夫知识</h3><p><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_181130_tv.danmaku.bili.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_181509_tv.danmaku.bili.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_181916_tv.danmaku.bili.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_182107_tv.danmaku.bili.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_182647_tv.danmaku.bili.jpg"></p>
<h3 id="贝尔曼算法"><a href="#贝尔曼算法" class="headerlink" title="贝尔曼算法"></a>贝尔曼算法</h3><p><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/BA92D9FA4C058429E9459B2F6919F9B3.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/14FB58773126E79946958CAB08245479.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/8509D810870CDFB6CECC51E97328E864.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/F960D94379339714FA1294C20FC4668C.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/412D1233FEDBE1B62E2CFE1AE10C8BA9.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/C9BAD6325DE30EB15F45E1D9806B41B6.jpg"></p>
<h3 id="TD算法流程"><a href="#TD算法流程" class="headerlink" title="TD算法流程"></a>TD算法流程</h3><p><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221110_144405_tv.danmaku.bili.jpg.jpg"></p>
<h3 id="策略梯度算法"><a href="#策略梯度算法" class="headerlink" title="策略梯度算法"></a>策略梯度算法</h3><p>max状态价值函数的期望<br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20230110_164220_tv.danmaku.bili.jpg.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_110113_tv.danmaku.bili.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221110_190328_tv.danmaku.bili.jpg.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221110_191103_tv.danmaku.bili.jpg.jpg"></p>
<h3 id="Actor-critic"><a href="#Actor-critic" class="headerlink" title="Actor-critic"></a>Actor-critic</h3><p><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/Screenshot_20221111_110943_tv.danmaku.bili.jpg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670468830722-11414b13-70eb-4fea-a18d-2eac215fd088.jpeg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670468864736-8b762a54-44ff-4e40-9abd-3a6f02437947.jpeg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670468886475-8af953b5-5077-4b2d-ba1e-83696d023912.jpeg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670468913259-b8d12eec-c3e0-47c6-bf4a-389dcd4c7ae8.jpeg"><br><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/1670469261446-03be5184-4329-4f60-95f7-bd5639c50970.jpeg"></p>
<p>PPO算法</p>
<h2 id="创新点"><a href="#创新点" class="headerlink" title="创新点"></a>创新点</h2><p>移动窗口<br>对比学习<br>有自己独特的观点</p>
</article><div class="post-copyright"><div class="post-copyright__author"><span class="post-copyright-meta">文章作者: </span><span class="post-copyright-info"><a href="https://sswd123.gitee.io">laptony</a></span></div><div class="post-copyright__type"><span class="post-copyright-meta">文章链接: </span><span class="post-copyright-info"><a href="https://sswd123.gitee.io/posts/10059/">https://sswd123.gitee.io/posts/10059/</a></span></div><div class="post-copyright__notice"><span class="post-copyright-meta">版权声明: </span><span class="post-copyright-info">本博客所有文章除特别声明外，均采用 <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">CC BY-NC-SA 4.0</a> 许可协议。转载请注明来自 <a href="https://sswd123.gitee.io" target="_blank">zapqwe block</a>！</span></div></div><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/AI/">AI</a></div><div class="post_share"><div class="social-share" data-image="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/A547B66ABDA63D737A9376686FDA3736.jpg" data-sites="facebook,twitter,wechat,weibo,qq"></div><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/butterfly-extsrc/sharejs/dist/css/share.min.css" media="print" onload="this.media='all'"><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc/sharejs/dist/js/social-share.min.js" defer></script></div></div><div class="post-reward"><div class="reward-button"><i class="fas fa-qrcode"></i> 打赏</div><div class="reward-main"><ul class="reward-all"><li class="reward-item"><a href="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/13.jpg" target="_blank"><img class="post-qr-code-img" src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/13.jpg" alt="微信"/></a><div class="post-qr-code-desc">微信</div></li><li class="reward-item"><a href="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/12.jpg" target="_blank"><img class="post-qr-code-img" src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/12.jpg" alt="支付宝"/></a><div class="post-qr-code-desc">支付宝</div></li></ul></div></div><nav class="pagination-post" id="pagination"><div class="prev-post pull-left"><a href="/posts/biyesheji/"><img class="prev-cover" src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/20230104211136.png" onerror="onerror=null;src='/img/404.jpg'" alt="cover of previous post"><div class="pagination-info"><div class="label">上一篇</div><div class="prev_info">毕业设计</div></div></a></div><div class="next-post pull-right"><a href="/posts/15871/"><img class="next-cover" src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/5af17f7f881b11ebb6edd017c2d2eca2.jpg" onerror="onerror=null;src='/img/404.jpg'" alt="cover of next post"><div class="pagination-info"><div class="label">下一篇</div><div class="next_info">网络知识笔记</div></div></a></div></nav><div class="relatedPosts"><div class="headline"><i class="fas fa-thumbs-up fa-fw"></i><span>相关推荐</span></div><div class="relatedPosts-list"><div><a href="/posts/27840/" title="huggingface_transformer笔记"><img class="cover" src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/5af17f7f881b11ebb6edd017c2d2eca2.jpg" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2023-01-20</div><div class="title">huggingface_transformer笔记</div></div></a></div><div><a href="/posts/29414/" title="知识增强的文本生成报告笔记"><img class="cover" src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/5af17f7f881b11ebb6edd017c2d2eca2.jpg" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2023-01-14</div><div class="title">知识增强的文本生成报告笔记</div></div></a></div></div></div><hr/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="fas fa-comments fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div class="vcomment" id="vcomment"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/11.gif" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/></div><div class="author-info__name">laptony</div><div class="author-info__description">敏于思考，锐于实现</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">12</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">9</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">3</div></a></div><a id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/zapqqqwe"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/zapqqqwe" target="_blank" title="Github"><i class="fas fa-github"></i></a><a class="social-icon" href="mailto:2496524403@qq.com" target="_blank" title="Email"><i class="fas fa-envelope"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">折腾中学会知识，互访中找到友情，写作中读懂人生，坚持中找到方向。</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content"><ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0"><span class="toc-number">1.</span> <span class="toc-text">神经网络深度学习</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%95%B0%E6%8D%AE"><span class="toc-number">1.1.</span> <span class="toc-text">数据</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%EF%BC%9A"><span class="toc-number">1.2.</span> <span class="toc-text">卷积神经网络：</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0"><span class="toc-number">2.</span> <span class="toc-text">强化学习</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%A6%82%E7%8E%87%E8%AE%BA"><span class="toc-number">2.1.</span> <span class="toc-text">概率论</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%80%BB%E8%A7%88"><span class="toc-number">2.2.</span> <span class="toc-text">总览</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%88%86%E7%B1%BB"><span class="toc-number">2.3.</span> <span class="toc-text">分类</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E9%A9%AC%E5%B0%94%E5%8F%AF%E5%A4%AB%E7%9F%A5%E8%AF%86"><span class="toc-number">2.4.</span> <span class="toc-text">马尔可夫知识</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E8%B4%9D%E5%B0%94%E6%9B%BC%E7%AE%97%E6%B3%95"><span class="toc-number">2.5.</span> <span class="toc-text">贝尔曼算法</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#TD%E7%AE%97%E6%B3%95%E6%B5%81%E7%A8%8B"><span class="toc-number">2.6.</span> <span class="toc-text">TD算法流程</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E7%AD%96%E7%95%A5%E6%A2%AF%E5%BA%A6%E7%AE%97%E6%B3%95"><span class="toc-number">2.7.</span> <span class="toc-text">策略梯度算法</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#Actor-critic"><span class="toc-number">2.8.</span> <span class="toc-text">Actor-critic</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%88%9B%E6%96%B0%E7%82%B9"><span class="toc-number">3.</span> <span class="toc-text">创新点</span></a></li></ol></div></div></div></div></main><footer id="footer" style="background-image: url('https://www-1305197828.cos.ap-beijing.myqcloud.com/imgs/A547B66ABDA63D737A9376686FDA3736.jpg')"><div id="footer-wrap"><div class="copyright">&copy;2022 - 2023  <i id="heartbeat" class="fa fas fa-heartbeat"></i> laptony</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div><div class="footer_custom_text">Hi, welcome to my  <a href="https://sswd123.gitee.io/">blog</a>!</div></div><head><link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/HCLonely/images@master/others/heartbeat.min.css"></head></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comments"></i></a><button id="go-up" type="button" title="回到顶部"><i class="fas fa-arrow-up"></i></button></div></div><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span>  数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><hr/><div id="local-search-results"></div></div></div><div id="search-mask"></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui/dist/fancybox.umd.min.js"></script><script>function panguFn () {
  if (typeof pangu === 'object') pangu.autoSpacingPage()
  else {
    getScript('https://cdn.jsdelivr.net/npm/pangu/dist/browser/pangu.min.js')
      .then(() => {
        pangu.autoSpacingPage()
      })
  }
}

function panguInit () {
  if (false){
    GLOBAL_CONFIG_SITE.isPost && panguFn()
  } else {
    panguFn()
  }
}

document.addEventListener('DOMContentLoaded', panguInit)</script><script src="/js/search/local-search.js"></script><div class="js-pjax"><script>(() => {
  const $mermaidWrap = document.querySelectorAll('#article-container .mermaid-wrap')
  if ($mermaidWrap.length) {
    window.runMermaid = () => {
      window.loadMermaid = true
      const theme = document.documentElement.getAttribute('data-theme') === 'dark' ? 'dark' : 'default'

      Array.from($mermaidWrap).forEach((item, index) => {
        const mermaidSrc = item.firstElementChild
        const mermaidThemeConfig = '%%{init:{ \'theme\':\'' + theme + '\'}}%%\n'
        const mermaidID = 'mermaid-' + index
        const mermaidDefinition = mermaidThemeConfig + mermaidSrc.textContent
        mermaid.mermaidAPI.render(mermaidID, mermaidDefinition, (svgCode) => {
          mermaidSrc.insertAdjacentHTML('afterend', svgCode)
        })
      })
    }

    const loadMermaid = () => {
      window.loadMermaid ? runMermaid() : getScript('https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js').then(runMermaid)
    }

    window.pjax ? loadMermaid() : document.addEventListener('DOMContentLoaded', loadMermaid)
  }
})()</script><script>function loadValine () {
  function initValine () {
    const valine = new Valine(Object.assign({
      el: '#vcomment',
      appId: 'ouV9RVxe71Vu4hdUP93QtOs1-gzGzoHsz',
      appKey: 'laBz1asytkOHj9NfuZVJogqf',
      avatar: 'monsterid',
      serverURLs: '',
      emojiMaps: "",
      path: window.location.pathname,
      visitor: false
    }, null))
  }

  if (typeof Valine === 'function') initValine() 
  else getScript('https://cdn.jsdelivr.net/npm/valine/dist/Valine.min.js').then(initValine)
}

if ('Valine' === 'Valine' || !false) {
  if (false) btf.loadComment(document.getElementById('vcomment'),loadValine)
  else setTimeout(loadValine, 0)
} else {
  function loadOtherComment () {
    loadValine()
  }
}</script></div><script src="https://cdn.jsdelivr.net/npm/blueimp-md5/js/md5.min.js"></script><script>window.addEventListener('load', () => {
  const changeContent = (content) => {
    if (content === '') return content

    content = content.replace(/<img.*?src="(.*?)"?[^\>]+>/ig, '[图片]') // replace image link
    content = content.replace(/<a[^>]+?href=["']?([^"']+)["']?[^>]*>([^<]+)<\/a>/gi, '[链接]') // replace url
    content = content.replace(/<pre><code>.*?<\/pre>/gi, '[代码]') // replace code
    content = content.replace(/<[^>]+>/g,"") // remove html tag

    if (content.length > 150) {
      content = content.substring(0,150) + '...'
    }
    return content
  }

  const getIcon = (icon, mail) => {
    if (icon) return icon
    let defaultIcon = '?d=monsterid'
    let iconUrl = `https://gravatar.loli.net/avatar/${md5(mail.toLowerCase()) + defaultIcon}`
    return iconUrl
  }

  const generateHtml = array => {
    let result = ''

    if (array.length) {
      for (let i = 0; i < array.length; i++) {
        result += '<div class=\'aside-list-item\'>'

        if (true) {
          const name = 'src'
          result += `<a href='${array[i].url}' class='thumbnail'><img ${name}='${array[i].avatar}' alt='${array[i].nick}'></a>`
        }

        result += `<div class='content'>
        <a class='comment' href='${array[i].url}' title='${array[i].content}'>${array[i].content}</a>
        <div class='name'><span>${array[i].nick} / </span><time datetime="${array[i].date}">${btf.diffDate(array[i].date, true)}</time></div>
        </div></div>`
      }
    } else {
      result += '没有评论'
    }

    let $dom = document.querySelector('#card-newest-comments .aside-list')
    $dom.innerHTML= result
    window.lazyLoadInstance && window.lazyLoadInstance.update()
    window.pjax && window.pjax.refresh($dom)
  }

  const getComment = () => {
    const serverURL = 'https://ouV9RVxe.api.lncldglobal.com'

    var settings = {
      "method": "GET",
      "headers": {
        "X-LC-Id": 'ouV9RVxe71Vu4hdUP93QtOs1-gzGzoHsz',
        "X-LC-Key": 'laBz1asytkOHj9NfuZVJogqf',
        "Content-Type": "application/json"
      },
    }

    fetch(`${serverURL}/1.1/classes/Comment?limit=6&order=-createdAt`,settings)
      .then(response => response.json())
      .then(data => {
        const valineArray = data.results.map(function (e) {
          return {
            'avatar': getIcon(e.QQAvatar, e.mail),
            'content': changeContent(e.comment),
            'nick': e.nick,
            'url': e.url + '#' + e.objectId,
            'date': e.updatedAt,
          }
        })
        saveToLocal.set('valine-newest-comments', JSON.stringify(valineArray), 10/(60*24))
        generateHtml(valineArray)
      }).catch(e => {
        const $dom = document.querySelector('#card-newest-comments .aside-list')
        $dom.innerHTML= "无法获取评论，请确认相关配置是否正确"
      }) 
  }

  const newestCommentInit = () => {
    if (document.querySelector('#card-newest-comments .aside-list')) {
      const data = saveToLocal.get('valine-newest-comments')
      if (data) {
        generateHtml(JSON.parse(data))
      } else {
        getComment()
      }
    }
  }

  newestCommentInit()
  document.addEventListener('pjax:complete', newestCommentInit)
})</script><div><canvas id="snow" style="position:fixed;top:0;left:0;width:100%;height:100%;z-index:99999;pointer-events:none"></canvas></div><script>const notMobile = (!(navigator.userAgent.match(/(phone|pad|pod|iPhone|iPod|ios|iPad|Android|Mobile|BlackBerry|IEMobile|MQQBrowser|JUC|Fennec|wOSBrowser|BrowserNG|WebOS|Symbian|Windows Phone)/i)));</script><script async type="text/javascript" src="https://cdn.jsdelivr.net/gh/Candinya/Kratos-Rebirth@latest/source/js/snow.min.js"></script><script defer="defer" id="fluttering_ribbon" mobile="false" src="https://cdn.jsdelivr.net/npm/butterfly-extsrc/dist/canvas-fluttering-ribbon.min.js"></script><script id="canvas_nest" defer="defer" color="0,0,255" opacity="0.7" zIndex="-1" count="99" mobile="false" src="https://cdn.jsdelivr.net/npm/butterfly-extsrc/dist/canvas-nest.min.js"></script><script src="https://cdn.jsdelivr.net/npm/butterfly-extsrc/dist/activate-power-mode.min.js"></script><script>POWERMODE.colorful = true;
POWERMODE.shake = true;
POWERMODE.mobile = false;
document.body.addEventListener('input', POWERMODE);
</script><script id="click-show-text" src="https://cdn.jsdelivr.net/npm/butterfly-extsrc/dist/click-show-text.min.js" data-mobile="false" data-text="I,LOVE,YOU" data-fontsize="15px" data-random="false" async="async"></script><div class="aplayer" data-id="7455077979" data-server="tencent" data-type="playlist" data-fixed="true" data-listFolded="false" data-order="random" data-preload="none"></div><link rel="stylesheet" href="https://cdn.bootcss.com/aplayer/1.10.1/APlayer.min.css"><script src="https://cdn.bootcss.com/aplayer/1.10.1/APlayer.min.js"></script><script src="https://cdn.jsdelivr.net/npm/meting@1.2.0/dist/Meting.min.js"></script><script src="https://cdn.jsdelivr.net/npm/pjax/pjax.min.js"></script><script>let pjaxSelectors = ["head > title","#config-diff","#body-wrap","#rightside-config-hide","#rightside-config-show",".js-pjax"]

var pjax = new Pjax({
  elements: 'a:not([target="_blank"])',
  selectors: pjaxSelectors,
  cacheBust: false,
  analytics: false,
  scrollRestoration: false
})

document.addEventListener('pjax:send', function () {

  // removeEventListener scroll 
  window.tocScrollFn && window.removeEventListener('scroll', window.tocScrollFn)
  window.scrollCollect && window.removeEventListener('scroll', scrollCollect)

  document.getElementById('rightside').style.cssText = "opacity: ''; transform: ''"
  
  if (window.aplayers) {
    for (let i = 0; i < window.aplayers.length; i++) {
      if (!window.aplayers[i].options.fixed) {
        window.aplayers[i].destroy()
      }
    }
  }

  typeof typed === 'object' && typed.destroy()

  //reset readmode
  const $bodyClassList = document.body.classList
  $bodyClassList.contains('read-mode') && $bodyClassList.remove('read-mode')

  typeof disqusjs === 'object' && disqusjs.destroy()
})

document.addEventListener('pjax:complete', function () {
  window.refreshFn()

  document.querySelectorAll('script[data-pjax]').forEach(item => {
    const newScript = document.createElement('script')
    const content = item.text || item.textContent || item.innerHTML || ""
    Array.from(item.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value))
    newScript.appendChild(document.createTextNode(content))
    item.parentNode.replaceChild(newScript, item)
  })

  GLOBAL_CONFIG.islazyload && window.lazyLoadInstance.update()

  typeof chatBtnFn === 'function' && chatBtnFn()
  typeof panguInit === 'function' && panguInit()

  // google analytics
  typeof gtag === 'function' && gtag('config', '', {'page_path': window.location.pathname});

  // baidu analytics
  typeof _hmt === 'object' && _hmt.push(['_trackPageview',window.location.pathname]);

  typeof loadMeting === 'function' && document.getElementsByClassName('aplayer').length && loadMeting()

  // prismjs
  typeof Prism === 'object' && Prism.highlightAll()
})

document.addEventListener('pjax:error', (e) => {
  if (e.request.status === 404) {
    pjax.loadUrl('/404.html')
  }
})</script><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script></div><script src="/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({"pluginRootPath":"live2dw/","pluginJsPath":"lib/","pluginModelPath":"assets/","tagMode":false,"debug":false,"model":{"jsonPath":"/live2dw/assets/koharu.model.json"},"display":{"position":"left","width":100,"height":200},"mobile":{"show":true},"rect":{"opacity":0.5},"log":false});</script></body></html>